## ---------------------------------------
## MAIN ANALYSES
## Replication file for Balcells, Laia and Francisco Villamil (2020)
## 'The double logic of internal purges: New evidence from Francoist Spain.'
## Nationalism and Ethnic Politics, 26(3) (forthcoming)
## Revised: July 2020
## ---------------------------------------

if(!grepl("replication$", getwd())){
  print("Choose any file in the replication folder (e.g. analyses.R)")
  dir = file.choose()
  setwd(gsub("replication(/.*)$", "replication", dir))
}

options(stringsAsFactors = FALSE)
pkg = c("ggplot2", "ggstance", "stargazer", "margins",
  "mice", "miceadds", "sandwich", "dplyr", "tidyr")
if(!all(pkg %in% rownames(installed.packages()))){
  pkg_no = pkg[!pkg %in% rownames(installed.packages())]
  warning(paste0("Installing: ", paste(pkg_no, collapse = ", ")), immediate. = TRUE)
  install.packages(pkg_no)
  }
lapply(pkg, require, character.only = TRUE)

wgt__ = NULL # weird issue: https://github.com/alexanderrobitzsch/miceadds/issues/18

# Load data
data = read.csv("dataset.csv")

## Preparing variables

# Postwar dummy after April 1, 1930
data$postwar = ifelse(as.Date(data$BOP) > "1939-04-01", 1, 0)
# Population-weighted victimization variables
data$executed_right_l = log(data$executed_right / data$p1930 * 1000 + 1)
# Logged 1930 population
data$p1930_l = log(data$p1930)

# Quiet function
quiet = function(x){
  sink(tempfile())
  on.exit(sink())
  invisible(force(x))
}

# Function to get clustered SEs
cluster_se = function(original_model){
  m_cse = glm.cluster(data = data, formula = formula(original_model),
      cluster = "muni_code", family = "binomial")
  if(!all(coefficients(original_model) == coef(m_cse))){
    stop(paste0("Different coefficients?! for model: ",
      as.character(formula(original_model)[3])))}
  return(quiet(summary(m_cse)[, "Std. Error"]))
}

# Function to get predicted probabilities using clustered VCOV
glm_pred = function(model, newdata, return = "pp",
  prov = TRUE, prov_list = c("albacete", "barcelona", "girona",
    "huesca", "lleida", "asturias", "tarragona", "bizkaia")){

  if(prov){
    # Add rows - factor issue with model.matrix
    prov_n = length(prov_list)
    newdata = rbind(newdata[rep(1, prov_n),], newdata)
    newdata$provincia = factor(c(prov_list,
      as.character(newdata$provincia)[(prov_n+1):nrow(newdata)]))
  }
  # Model matrix and coefficients
  m_mat = model.matrix(delete.response(model$glm_res$terms), data = newdata)
  m_coef = model$glm_res$coef
  if(prov){
    # Remove extra rows
    m_mat = m_mat[(prov_n+1):nrow(m_mat),]
    newdata = newdata[(prov_n+1):nrow(newdata),]
  }
  # Calculate
  fit = as.vector(m_mat %*% m_coef)
  se_fit = sqrt(diag(m_mat %*% model$vcov %*% t(m_mat)))
  # Return
  if(return == "pp"){
    return(family(model$glm_res)$linkinv(fit))
  } else if (return == "se"){
    return(se_fit)
  } else if (return == "upper"){
    return(family(model$glm_res)$linkinv(fit + 1.96 * se_fit))
  } else if (return == "lower"){
    return(family(model$glm_res)$linkinv(fit - 1.96 * se_fit))
  } else {stop("Return what?")}

}

### Analyses ###

## 1. Targeting teachers from leftist municipalities

m1 = glm(inhabilitacion ~ left36 + postwar +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")
m2 = glm(inhabilitacion ~ left36 * postwar +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")
m3 = glm(confirmacion ~ left36 + postwar +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")
m4 = glm(confirmacion ~ left36 * postwar +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")

stargazer(m1, m2, m3, m4, type = "html",
  title = "Logistic regression on purges commission's final resolution",
  omit = "provincia", intercept.bottom = FALSE,
  dep.var.labels = c("Removal", "Confirmation"),
  covariate.labels = c("(Intercept)", "Leftist support 1936",
    "Postwar period", "Rightist victimization",
    "Trade Unions prewar", "Log. Population 1930", "Left 1936 x Postwar"),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  se = list(cluster_se(m1), cluster_se(m2), cluster_se(m3), cluster_se(m4)),
  omit.stat = c("ll"),
  notes = "Note: + p < 0.1; * p < 0.05; ** p < 0.01; *** p < 0.001. Province fixed effects not shown. Standard errors clustered at the level of municipalities. Data for the Basque Country includes only data from the province of Bizkaia.",
  notes.append = FALSE,
  out = "table_leftist.doc"
)


# Predicted probabilities plot

ndf = expand.grid(left36 = seq(0, 1, 0.1), postwar = 0:1)
ndf$p1930_l = mean(data$p1930_l[!duplicated(data$muni_code)], na.rm = T)
ndf$executed_right_l = mean(data$executed_right_l[!duplicated(data$muni_code)], na.rm = T)
ndf$sindic31_bin = 0
ndf$provincia = factor("albacete")

m2b = glm.cluster(data=data, formula=formula(m2), cluster="muni_code", family="binomial")
m4b = glm.cluster(data=data, formula=formula(m4), cluster="muni_code", family="binomial")

ndf$pp = glm_pred(m2b, ndf, return = "pp")
ndf$ul = glm_pred(m2b, ndf, return = "upper")
ndf$ll = glm_pred(m2b, ndf, return = "lower")
ndf$pp_conf = glm_pred(m4b, ndf, return = "pp")
ndf$ul_conf = glm_pred(m4b, ndf, return = "upper")
ndf$ll_conf = glm_pred(m4b, ndf, return = "lower")

ndf$postwar = factor(ndf$postwar)
levels(ndf$postwar) = c("During the war", "After war ends")

pdf("pp_inhab_indiv.pdf", width = 6, height = 5)
ggplot(ndf, aes(x = left36, y = pp)) +
  geom_line() +
  geom_ribbon(aes(ymin = ll, ymax = ul), alpha = 0.2) +
  facet_wrap(~postwar, scales = "free") +
  scale_y_continuous(limits = c(0,1)) +
  theme_classic() +
  theme(panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(size = 0.25, color = gray(0.95)),
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 12),
        panel.border = element_blank(),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 9, hjust = 0, margin = margin(t = 15)),
        # plot.margin = unit(c(1,2,1,2), "cm"),
        strip.background = element_blank()) +
  labs(x = "\nLeftist support 1936 in municipality of origin",
    y = "Probability of removal\n")
dev.off()

pdf("pp_conf_indiv.pdf", width = 6, height = 5)
ggplot(ndf, aes(x = left36, y = pp_conf)) +
  geom_line() +
  geom_ribbon(aes(ymin = ll_conf, ymax = ul_conf), alpha = 0.2) +
  facet_wrap(~postwar, scales = "free") +
  scale_y_continuous(limits = c(0,1)) +
  theme_classic() +
  theme(panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(size = 0.25, color = gray(0.95)),
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 12),
        panel.border = element_blank(),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 9, hjust = 0, margin = margin(t = 15)),
        # plot.margin = unit(c(1,2,1,2), "cm"),
        strip.background = element_blank()) +
  labs(x = "\nLeftist support 1936 in municipality of origin",
    y = "Probability of confirmation\n")
dev.off()

## 2. Targeting teachers with Basque/Catalan family names

## EUK & CAT
m5 = glm(inhabilitacion ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m6 = glm(confirmacion ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m7 = glm(traslado_region ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m8 = glm(inhabilitacion ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)
m9 = glm(confirmacion ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)
m10 = glm(traslado_region ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)

names(m5$coefficients)[names(m5$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m6$coefficients)[names(m6$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m7$coefficients)[names(m7$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m8$coefficients)[names(m8$coefficients) == "ape_cat_any"] = "ape_e_c_any"
names(m9$coefficients)[names(m9$coefficients) == "ape_cat_any"] = "ape_e_c_any"
names(m10$coefficients)[names(m10$coefficients) == "ape_cat_any"] = "ape_e_c_any"

cluster_se_ape = list(cluster_se(m5), cluster_se(m6), cluster_se(m7),
  cluster_se(m8), cluster_se(m9), cluster_se(m10))
cluster_se_ape = lapply(cluster_se_ape, function(x){
  names(x)[names(x) %in% c("ape_cat_any", "ape_euk_any")] = "ape_e_c_any"
  return(x)})

stargazer(m5, m6, m7, m8, m9, m10, type = "html",
  title = "Logistic regression on purges commission's final resolution",
  omit = "provincia", intercept.bottom = FALSE,
  dep.var.labels = rep(c("Removal", "Confirmation", "Relocation region"), 2),
  column.labels   = c("Basque Country", "Catalonia"), column.separate = c(3, 3),
  covariate.labels = c("(Intercept)",
    "Basque/Cat name", "Male",
    "Trade Unions", "Left 1936",
    "Log. Pop. 1930", "Rightist vict."),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  se = cluster_se_ape,
  omit.stat = c("ll"),
  notes = "Note: + p < 0.1; * p < 0.05; ** p < 0.01; *** p < 0.001. Province fixed effects not shown. Standard errors clustered at the level of municipalities. Data for the Basque Country includes only data from the province of Bizkaia.",
  notes.append = FALSE,
  out = "table_family_names.doc"
)


m11 = glm(A_militancia ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m12 = glm(B_nacionalismo ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m13 = glm(C_actitudesCN ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m14 = glm(D_izquierdas ~ ape_euk_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l,
  family = "binomial", data = data)
m15 = glm(A_militancia ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)
m16 = glm(B_nacionalismo ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)
m17 = glm(C_actitudesCN ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)
m18 = glm(D_izquierdas ~ ape_cat_any + gender + sindic31_bin +
  left36 + p1930_l + executed_right_l + factor(provincia),
  family = "binomial", data = data)

names(m11$coefficients)[names(m11$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m12$coefficients)[names(m12$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m13$coefficients)[names(m13$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m14$coefficients)[names(m14$coefficients) == "ape_euk_any"] = "ape_e_c_any"
names(m15$coefficients)[names(m15$coefficients) == "ape_cat_any"] = "ape_e_c_any"
names(m16$coefficients)[names(m16$coefficients) == "ape_cat_any"] = "ape_e_c_any"
names(m17$coefficients)[names(m17$coefficients) == "ape_cat_any"] = "ape_e_c_any"
names(m18$coefficients)[names(m18$coefficients) == "ape_cat_any"] = "ape_e_c_any"

cluster_se_ape_c_euk = list(cluster_se(m11), cluster_se(m12),
  cluster_se(m13), cluster_se(m14))
cluster_se_ape_c_euk = lapply(cluster_se_ape_c_euk, function(x){
  names(x)[names(x) %in% c("ape_cat_any", "ape_euk_any")] = "ape_e_c_any"
  return(x)})

cluster_se_ape_c_cat = list(cluster_se(m15), cluster_se(m16),
  cluster_se(m17), cluster_se(m18))
cluster_se_ape_c_cat = lapply(cluster_se_ape_c_cat, function(x){
  names(x)[names(x) %in% c("ape_cat_any", "ape_euk_any")] = "ape_e_c_any"
  return(x)})


stargazer(m11, m12, m13, m14, type = "html",
  title = "Logistic regression on charges against teachers in the Basque Country",
  intercept.bottom = FALSE,
  dep.var.labels = c("{\\footnotesize Political part.}", "{\\footnotesize Nationalism}",
    "{\\footnotesize Ag. Causa Nacional}", "{\\footnotesize Leftist symp.}"),
  covariate.labels = c("(Intercept)",
    "Basque name", "Male",
    "Trade Unions prewar", "Leftist support 1936",
    "Log. Population 1930", "Rightist victimization"),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  omit.stat = c("ll"),
  se = cluster_se_ape_c_euk,
  notes = "Note: + p < 0.1; * p < 0.05; ** p < 0.01; *** p < 0.001. Province fixed effects not shown. Standard errors clustered at the level of municipalities. Data for the Basque Country includes only data from the province of Bizkaia.",
  notes.append = FALSE,
  out = "table_family_names_charges_euk.doc"
)

stargazer(m15, m16, m17, m18, type = "html",
  title = "Logistic regression on charges against teachers in Catalonia",
  intercept.bottom = FALSE,
  omit = "provincia",
  dep.var.labels = c("{\\footnotesize Political part.}", "{\\footnotesize Nationalism}",
    "{\\footnotesize Ag. Causa Nacional}", "{\\footnotesize Leftist symp.}"),
  covariate.labels = c("(Intercept)",
    "Catalan name", "Male",
    "Trade Unions prewar", "Leftist support 1936",
    "Log. Population 1930", "Rightist victimization"),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  omit.stat = c("ll"),
  se = cluster_se_ape_c_cat,
  notes = "Note: + p < 0.1; * p < 0.05; ** p < 0.01; *** p < 0.001. Province fixed effects not shown. Standard errors clustered at the level of municipalities.",
  notes.append = FALSE,
  out = "table_family_names_charges_cat.doc"
)

# Clustered SE models for outcomes
m5b = glm.cluster(data = data, formula = formula(m5),
  cluster = "muni_code", family = "binomial")
m6b = glm.cluster(data = data, formula = formula(m6),
  cluster = "muni_code", family = "binomial")
m7b = glm.cluster(data = data, formula = formula(m7),
  cluster = "muni_code", family = "binomial")
m8b = glm.cluster(data = data, formula = formula(m8),
  cluster = "muni_code", family = "binomial")
m9b = glm.cluster(data = data, formula = formula(m9),
  cluster = "muni_code", family = "binomial")
m10b = glm.cluster(data = data, formula = formula(m10),
  cluster = "muni_code", family = "binomial")
# Marginal effects
mar_eff_outcomes = rbind(
  cbind(summary(margins(m5b[[1]], variables = "ape_euk_any", vcov = m5b$vcov)),
    outcome = "Removal"),
  cbind(summary(margins(m6b[[1]], variables = "ape_euk_any", vcov = m6b$vcov)),
    outcome = "Confirmation"),
  cbind(summary(margins(m7b[[1]], variables = "ape_euk_any", vcov = m7b$vcov)),
    outcome = "Relocation"),
  cbind(summary(margins(m8b[[1]], variables = "ape_cat_any", vcov = m8b$vcov)),
    outcome = "Removal"),
  cbind(summary(margins(m9b[[1]], variables = "ape_cat_any", vcov = m9b$vcov)),
    outcome = "Confirmation"),
  cbind(summary(margins(m10b[[1]], variables = "ape_cat_any", vcov = m10b$vcov)),
    outcome = "Relocation"))

# Clustered SE models for charges
m11b = glm.cluster(data = data, formula = formula(m11),
  cluster = "muni_code", family = "binomial")
m12b = glm.cluster(data = data, formula = formula(m12),
  cluster = "muni_code", family = "binomial")
m13b = glm.cluster(data = data, formula = formula(m13),
  cluster = "muni_code", family = "binomial")
m14b = glm.cluster(data = data, formula = formula(m14),
  cluster = "muni_code", family = "binomial")
m15b = glm.cluster(data = data, formula = formula(m15),
  cluster = "muni_code", family = "binomial")
m16b = glm.cluster(data = data, formula = formula(m16),
  cluster = "muni_code", family = "binomial")
m17b = glm.cluster(data = data, formula = formula(m17),
  cluster = "muni_code", family = "binomial")
m18b = glm.cluster(data = data, formula = formula(m18),
  cluster = "muni_code", family = "binomial")
# Marginal effects
mar_eff_charges = rbind(
  cbind(summary(margins(m11b[[1]], variables = "ape_euk_any", vcov = m11b$vcov)),
    outcome = "Political\nparticipation"),
  cbind(summary(margins(m12b[[1]], variables = "ape_euk_any", vcov = m12b$vcov)),
    outcome = "Nationalism"),
  cbind(summary(margins(m13b[[1]], variables = "ape_euk_any", vcov = m13b$vcov)),
    outcome = "Attitudes against\nCausa Nacional"),
  cbind(summary(margins(m14b[[1]], variables = "ape_euk_any", vcov = m14b$vcov)),
    outcome = "Leftist\nSympathies"),
  cbind(summary(margins(m15b[[1]], variables = "ape_cat_any", vcov = m15b$vcov)),
    outcome = "Political\nparticipation"),
  cbind(summary(margins(m16b[[1]], variables = "ape_cat_any", vcov = m16b$vcov)),
    outcome = "Nationalism"),
  cbind(summary(margins(m17b[[1]], variables = "ape_cat_any", vcov = m17b$vcov)),
    outcome = "Attitudes against\nCausa Nacional"),
  cbind(summary(margins(m18b[[1]], variables = "ape_cat_any", vcov = m18b$vcov)),
    outcome = "Leftist\nSympathies"))

mar_eff_outcomes$factor = factor(mar_eff_outcomes$factor)
levels(mar_eff_outcomes$factor) = c("Catalonia", "Basque Country")

pdf("mareffects_outcomes.pdf", width = 5, height = 5)
ggplot(mar_eff_outcomes, aes(x = AME, y = outcome, group = factor, color = factor)) +
  geom_point(position = position_dodgev(height = 1/5)) +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0,
    position = position_dodgev(height = 1/5)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_color_manual(values = c("#d90024", "#0260e3")) +
  theme_classic() +
  theme(panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(size = 0.25, color = gray(0.95)),
        axis.title = element_text(size = 10),
        panel.border = element_blank(),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 9, hjust = 0, margin = margin(t = 15)),
        legend.position = c(0.8, 0.1), legend.title = element_blank(),
        strip.background = element_blank()) +
  labs(y = "Final outcome\n",
    x = "\nAME of having a Basque/Catalan family name")
dev.off()

mar_eff_charges$factor = factor(mar_eff_charges$factor)
levels(mar_eff_charges$factor) = c("Catalonia", "Basque Country")

pdf("mareffects_charges.pdf", width = 5, height = 5)
ggplot(mar_eff_charges, aes(x = AME, y = outcome, group = factor, color = factor)) +
  geom_point(position = position_dodgev(height = 1/5)) +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0,
    position = position_dodgev(height = 1/5)) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_color_manual(values = c("#d90024", "#0260e3")) +
  theme_classic() +
  theme(panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(size = 0.25, color = gray(0.95)),
        axis.title = element_text(size = 10),
        panel.border = element_blank(),
        plot.caption = element_text(size = 9, hjust = 0, margin = margin(t = 15)),
        legend.position = c(0.8, 0.1), legend.title = element_blank(),
        strip.background = element_blank()) +
  labs(y = "Charge accused of\n",
    x = "\nAME of having a Basque/Catalan family name")
dev.off()
